################################################################################################
#######  Volha Charnysh. 2024. Uprooted: How post-WWII Population Transfers Remade Europe. CUP
#######  Chapter 8: Code for replicating analysis for Bavaria at the municipal level 
######## R version 3.6.3 (2020-02-29) -- "Holding the Windsock"
######## Platform: x86_64-apple-darwin15.6.0 (64-bit)
####################################################################################

rm(list = ls())
library(dplyr)
library(openxlsx)
library(gplots) #for plotmeans
library(sandwich)
library(lme4)
library(lmtest)
library(texreg)
library(ggplot2)
library(arm) 
library(reldist) #for gini
library(xtable)
library(multiwayvcov)
library(stargazer)
library(tidyr) #for creating panel dataset

##############################################################
####### READ IN DATASET AND COMPUTE KEY VARIABLES ############
##############################################################

dat<-read.xlsx("chapter8/bayern_1939_2011_wf_dist.xlsx")
head(dat)

dat$pmale_50<-1-dat$pop1950female/dat$pop1950
dat$srefugees1950<-dat$prefugees1950/100
dat$SelEmpl39<-dat$semployedfreeprof1939/(dat$semployedfreeprof1939+dat$nsuppfamilymembers1939+dat$civilservants1939+dat$employees1939+dat$laborers1939)


dat$CivServPT39<-dat$wpservantsservices1939/(dat$pop1950census/1000) #Offentlicher Diesnt Erwerbspersonen
dat$CivServPT50<-dat$civilservantsemployees1950/(dat$pop1950census/1000) #this is based on erwerbspersonen: Beamte & angesttellte.
dat$BeamtePT70<-dat$Beamte_Soldaten_insg_1970/(dat$Bevölkerung_insges_1970/1000)
dat$BeamtePT87<-dat$Beamte_Soldaten_insg_1987/(dat$Bevölkerung_insges_1987/1000)


#### agricultural employment : 
dat$InAgric_50<-dat$wagriforestry1950/dat$nactive1950
dat$InAgric_70<-dat$LForstwirtschaft_insg_1970/dat$Erwerbstätige_insg_1970
dat$InAgric_87<-dat$LForstwirtschaft_insg_1987/dat$Erwerbstätige_insg_1987

#### industrial employment : 
dat$InManuf70 <-dat$Produzierenes_Gewerbe_insg_1970/dat$Erwerbstätige_insg_1970
dat$InManuf87 <-dat$Produzierenes_Gewerbe_insg_1987/dat$Erwerbstätige_insg_1987
dat$HandelVerkehr70 <-dat$Handel_Verkehr_insg_1970/dat$Erwerbstätige_insg_1970
dat$HandelVerkehr87 <-dat$Handel_Verkehr_insg_1987/dat$Erwerbstätige_insg_1987

### Self-employed : 
dat$SelbstStandige_50<-dat$nselfemployed1950/dat$nactive1950
dat$SelbstStandige_70<-dat$Selbständige_insg_1970/dat$Erwerbstätige_insg_1970
dat$SelbstStandige_87<-dat$Selbständige_insg_1987/dat$Erwerbstätige_insg_1987

dat$SelbstStandige_39pt<-dat$semployedfreeprof1939/(dat$pop1939/1000)
dat$SelbstStandige_50pt<-dat$nselfemployed1950/(dat$pop1950census/1000)
dat$SelbstStandige_70pt<-dat$Selbständige_insg_1970/(dat$Bevölkerung_insges_1970/1000)
dat$SelbstStandige_87pt<-dat$Selbständige_insg_1987/(dat$Bevölkerung_insges_1987/1000)


##education in 1987 (1970 still needs to be integrated)
dat$HochSchuleShare1987 <-dat$Hochsch_Schulabschluss_insg_1987/dat$Aged18_65_1987
dat$HochSchuleShareW1987 <-dat$Hochsch_SchulabschlussW_1987/dat$Aged18_65_W_1987
dat$HochSchulePT1987 <-dat$Hochsch_Schulabschluss_insg_1987/(dat$Aged18_65_1987/1000)

dat$pfemale_39<-1-dat$pop_male1939/dat$pop1939
dat$pfemale_50<-dat$pop1950female/dat$pop1950
dat$pfemale_70<-1-dat$Bevölkerung_weib_1970/dat$Bevölkerung_insges_1970
dat$pfemale_87<-1-dat$Bevölkerung_weib_1987/dat$Bevölkerung_insges_1987


dat$Lnpop1939<-log(dat$pop1939)
dat$LndistBorderEastKM<-log(dat$distBorderEastKM)

dat$stadt<-ifelse(dat$Kreis=="Kreisfreie Stadt" | dat$Kreis=="Kreisfreie Städte", 1, 0)

#Gini coefficient:
gini.res <- c()
for (i in 1:nrow(dat)) {
  gini.res[i] <- gini(x = c(1, 12.5, 35, 50), 
                      weights = as.numeric(as.vector(dat[i, c("nfarms2bis5ha1950", "nfarms5bis20ha1950", 
                                                              "nfarms20bis50ha1950", "nfarms50ha1950")])))
}
dat$land.gini1950 <- gini.res

###############################################################################
########  Table A.3. Descriptive statistics for Bavarian municipalities #######
###############################################################################

vars<-c("srefugees1950", "pmale_50", "sagri1939", "diststation1950", "land.gini1950", "pop1939", "distBorderEastKM", "SelEmpl39","SelbstStandige_50", "SelbstStandige_70","SelbstStandige_87", "InAgric_70", "InAgric_87", "InManuf70", "InManuf87")

stargazer(dat[,which(names(dat) %in% vars) ], title="Descriptive statistics for Bavarian municipalities", digits=2,summary.stat=c("n", "mean","sd",  "min", "max"), covariate.labels=c("Population (1939)", "Distance to rail, km (1939)", "Agriculture (1939)", "Distance to Eastern border", "Male (1950)", "Share refugee (1950)", "Self-employed 1939", "Agriculture (1970)", 
                             "Agriculture (1987)","Manufacturing (1970)", "Manufacturing (1987)","Self-employed 1950", "Self-employed 1970", "Self-employed 1987", "Landholding gini (1950)"))


#################################
###### Create a panel dataset ###
#################################



keycol <- "year"
valuecol <- "SelfEmployed"
gathercols <- c("SelEmpl39", "SelbstStandige_50", "SelbstStandige_70", "SelbstStandige_87")
part1<-gather_(dat[,c("ags","SelEmpl39", "SelbstStandige_50", "SelbstStandige_70", "SelbstStandige_87")], keycol, valuecol, gathercols)

part1$year<-ifelse(part1$year=="SelEmpl39", 1939, ifelse(part1$year=="SelbstStandige_50", 1950, ifelse(part1$year=="SelbstStandige_70", 1970, 1987)))

keycol <- "year"
valuecol2 <- "ShareAgr"
gathercols2 <- c("sagri1939", "InAgric_50", "InAgric_70", "InAgric_87")
part2<-gather_(dat[,c("ags","sagri1939", "InAgric_50", "InAgric_70", "InAgric_87")], keycol, valuecol2, gathercols2)
part2$year<-ifelse(part2$year=="sagri1939", 1939, ifelse(part2$year=="InAgric_50", 1950, ifelse(part2$year=="InAgric_70", 1970, 1987)))

valuecol3 <- "ShareCath"
gathercols3 <- c("scatholics1939", "scatholics1950", "scatholics1970", "scatholics1987")
part3<-gather_(dat[,c("ags","scatholics1939", "scatholics1950", "scatholics1970", "scatholics1987")], keycol, valuecol3, gathercols3)
part3$year<-ifelse(part3$year=="scatholics1939", 1939, ifelse(part3$year=="scatholics1950", 1950, ifelse(part3$year=="scatholics1970", 1970, 1987)))

valuecol4 <- "ShareWomen"
gathercols4 <- c("pfemale_39", "pfemale_50", "pfemale_70", "pfemale_87")
part4<-gather_(dat[,c("ags","pfemale_39", "pfemale_50", "pfemale_70", "pfemale_87")], keycol, valuecol4, gathercols4)
part4$year<-ifelse(part4$year=="pfemale_39", 1939, ifelse(part4$year=="pfemale_50", 1950, ifelse(part4$year=="pfemale_70", 1970, 1987)))

valuecol5<- "Population"
gathercols5 <- c("pop1939", "pop1950census", "Bevölkerung_insges_1970", "Bevölkerung_insges_1987")
part5<-gather_(dat[,c("ags","pop1939", "pop1950census", "Bevölkerung_insges_1970", "Bevölkerung_insges_1987")], keycol, valuecol5, gathercols5)
part5$year<-ifelse(part5$year=="pop1939", 1939, ifelse(part5$year=="pop1950census", 1950, ifelse(part5$year=="Bevölkerung_insges_1970", 1970, 1987)))

valuecol6<-"SelfEmployedPt"
gathercols6<-c("SelbstStandige_39pt", "SelbstStandige_50pt", "SelbstStandige_70pt", "SelbstStandige_87pt")
part6<-gather_(dat[,c("ags","SelbstStandige_39pt", "SelbstStandige_50pt", "SelbstStandige_70pt", "SelbstStandige_87pt")], keycol, valuecol6, gathercols6)
part6$year<-ifelse(part6$year=="SelbstStandige_39pt", 1939, ifelse(part6$year=="SelbstStandige_50pt", 1950, ifelse(part6$year=="SelbstStandige_70pt", 1970, 1987)))



temp1<-merge(part1, part2, by=c("year", "ags"))
temp2<-merge(temp1, part3, by=c("year", "ags"))
temp3<-merge(temp2, part4, by=c("year", "ags"))
temp4<-merge(temp3, part5, by=c("year", "ags"))
temp5<-merge(temp4, part6, by=c("year", "ags"))

dat$LndistBorderEastKM<-log(dat$distBorderEastKM)

fin<-merge(temp5, dat[, c("ags", "srefugees1950", "pop1950female", "diststation1950", "land.gini1950", "pop1939", "distBorderEastKM", "LndistBorderEastKM", "sagri1939", "Kreis", "stadt","lat","lon")], by="ags")


head(fin)

fin$post<-ifelse(fin$year>1939, 1,0)

finAboveMedian<-subset(fin, srefugees1950 > 0.24648)
finBelowMedian<-subset(fin, srefugees1950 <= 0.24648)

########################################################################################
##### Table A.20. Share of expellees and changes in self-employment and agricultural ###
##### employment in Bavarian municipalities.                              ##############
########################################################################################


ols1<-lm(SelfEmployed~factor(year)*srefugees1950+ShareCath+ShareWomen+log(Population)+factor(ags)+factor(year)*LndistBorderEastKM+factor(year)*stadt, data=fin) 
ols1.cl <- coeftest(ols1, vcov = cluster.vcov(ols1, fin$ags))

ols2<-lm(ShareAgr~factor(year)*srefugees1950+ShareCath+ShareWomen+log(Population)+factor(ags)+factor(year)*LndistBorderEastKM+factor(year)*stadt, data=fin) 
ols2.cl <- coeftest(ols2, vcov = cluster.vcov(ols2, fin$ags))

ols3<-lm(ShareAgr~factor(year)*srefugees1950+ShareCath+ShareWomen+log(Population)+factor(ags)+factor(year)*LndistBorderEastKM+factor(year)*stadt, data=finAboveMedian) 
ols3.cl <- coeftest(ols3, vcov = cluster.vcov(ols3, finAboveMedian$ags))

ols4<-lm(ShareAgr~factor(year)*srefugees1950+ShareCath+ShareWomen+log(Population)+factor(ags)+factor(year)*LndistBorderEastKM+factor(year)*stadt, data=finBelowMedian)
ols4.cl <- coeftest(ols4, vcov = cluster.vcov(ols4, finBelowMedian$ags))

tableA.20 <- texreg(
  l = list(ols1, ols2, ols3, ols4),
  override.se = list(ols1.cl[,2],ols2.cl[,2], ols3.cl[,2], ols4.cl[,2]),
  override.pval = list(ols1.cl[,4],ols2.cl[,4],  ols3.cl[,4], ols4.cl[,4]),
  custom.model.names = c("(1)","(2)", "(3)", "(4)"),
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  omit.coef = "(Intercept)|ags",
  include.rsquared = FALSE
)

##Figure 8.2: two parts

d<-as.data.frame(ols1.cl[c("factor(year)1950:srefugees1950", "factor(year)1970:srefugees1950", "factor(year)1987:srefugees1950"),])
d$CI2.5<-d$Estimate +1.96*d[,2]
d$CI97.5<-d$Estimate-1.96*d[,2]

effects<-ggplot(d, aes(x = 1:3, y = Estimate)) + geom_point(size = 4) + geom_errorbar(aes(ymax = CI97.5, ymin = CI2.5)) +scale_x_continuous(breaks = c(1:3), labels=c("1950", "1970","1987"))+labs(x = "", y = "Coefficient on share expellees", colour = "", shape = "", linetype = "")+theme_minimal()
figure8.2a<-effects+geom_hline(yintercept=0, linetype="dashed", color = "red") +ggtitle("Change in self-employment rates from 1939 in Bavarian municipalities")

figure8.2a
ggsave("Figure8.2aBavaria.jpg", figure8.2a, width = 8, height = 6)

####
d2<-as.data.frame(ols2.cl[c("factor(year)1950:srefugees1950", "factor(year)1970:srefugees1950", "factor(year)1987:srefugees1950"),])
d2$CI2.5<-d2$Estimate +1.96*d2[,2]
d2$CI97.5<-d2$Estimate-1.96*d2[,2]

effects2<-ggplot(d2, aes(x = 1:3, y = Estimate)) + geom_point(size = 4) + geom_errorbar(aes(ymax = CI97.5, ymin = CI2.5)) +scale_x_continuous(breaks = c(1:3), labels=c("1950", "1970","1987"))+labs(x = "", y = "Coefficient on share expellees", colour = "", shape = "", linetype = "")+theme_minimal()
figure8.2b<-effects2+geom_hline(yintercept=0, linetype="dashed", color = "red") +ggtitle("Change in agricultural employment from 1939 in Bavarian municipalities")

figure8.2b

ggsave("Figure8.2bBavaria.jpg", figure8.2b, width = 8, height = 6)

